{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 最后一层\n",
    "\n",
    "回归问题：不需要特殊定义的最后一层  \n",
    "二分类问题：`model.add(Activation('sigmoid'))`  \n",
    "多分类问题：`model.add(Activation('softmax'))`  \n",
    "多分类问题输出的是每个类别的概率，可以使用softmax把概率向量转成类别index.  \n",
    "\n",
    "# model.summary()\n",
    "\n",
    "打印完整的结构"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 常用的调参方式\n",
    "\n",
    "## LSTM的memory cell\n",
    "\n",
    "```python\n",
    "model.add(LSTM(n_cells))\n",
    "```\n",
    "\n",
    "## batch_size\n",
    "\n",
    "```python\n",
    "model.fit(X, y, epochs, batch_size=n_batch)\n",
    "```\n",
    "\n",
    "## dropout\n",
    "\n",
    "```python\n",
    "model.add(LSTM(..., dropout=0.4))\n",
    "```\n",
    "\n",
    "## L1L2正则化\n",
    "\n",
    "```python\n",
    "model.add(LSTM(..., kernel_regularizer=L1L2(0.01, 0.01)))\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 用指定参数初始化模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[10.]], dtype=float32)"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import tensorflow as tf\n",
    "import numpy as np\n",
    "\n",
    "class ExampleRandomNormal(tf.keras.initializers.Initializer):\n",
    "\n",
    "    def __init__(self, weights):\n",
    "        self.weights = weights\n",
    "\n",
    "    def __call__(self, shape, dtype=None):\n",
    "        return self.weights\n",
    "\n",
    "    def get_config(self):  # To support serialization\n",
    "        return {'weights': self.weights}\n",
    "\n",
    "# y = W * x + b，定义W=10，b=0\n",
    "layer = tf.keras.layers.Dense(1, kernel_initializer=ExampleRandomNormal([[10]]))\n",
    "model = tf.keras.Sequential([layer])\n",
    "model.predict([1])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 输出中间层结果"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Model: \"sequential_9\"\n",
      "_________________________________________________________________\n",
      "Layer (type)                 Output Shape              Param #   \n",
      "=================================================================\n",
      "dense_15 (Dense)             (None, None, 1)           2         \n",
      "_________________________________________________________________\n",
      "dense_16 (Dense)             (None, None, 1)           2         \n",
      "=================================================================\n",
      "Total params: 4\n",
      "Trainable params: 4\n",
      "Non-trainable params: 0\n",
      "_________________________________________________________________\n"
     ]
    }
   ],
   "source": [
    "import tensorflow as tf\n",
    "import numpy as np\n",
    "\n",
    "class ExampleRandomNormal(tf.keras.initializers.Initializer):\n",
    "\n",
    "    def __init__(self, weights):\n",
    "        self.weights = weights\n",
    "\n",
    "    def __call__(self, shape, dtype=None):\n",
    "        return self.weights\n",
    "\n",
    "    def get_config(self):  # To support serialization\n",
    "        return {'weights': self.weights}\n",
    "\n",
    "# y = 10 * x\n",
    "layer0 = tf.keras.layers.Dense(1, kernel_initializer=ExampleRandomNormal([[10]]), input_shape=[None, 1])\n",
    "# y = 5 * x\n",
    "layer1 = tf.keras.layers.Dense(1, kernel_initializer=ExampleRandomNormal([[5]]))\n",
    "model = tf.keras.Sequential([layer0, layer1])\n",
    "model.summary()\n",
    "layer = tf.keras.backend.function(inputs=[model.layers[0].input], outputs=[model.layers[0].output])\n",
    "# layer(1)  # 不知道为什么会挂掉"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 序列模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "\n",
    "model = tf.keras.Sequential(\n",
    "    [\n",
    "        # Embedding是指把编码转成指定维度的向量，使具有相近语义的单词其向量也是相近的\n",
    "        tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),\n",
    "        # Flatten也可以用GlobalAveragePooling1D代替，后者速度更快\n",
    "        tf.keras.layers.Flatten().\n",
    "        tf.keras.layers.Dense(6, activation='relu'),\n",
    "        tf.keras.layers.Dense(1, activation='sigmoid')\n",
    "    ]\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## One-to-One Model\n",
    "\n",
    "![](assets/2.png)  \n",
    "这样的模型是poor use of the LSTM，因为它put all if the pressure on the internal state of memory，因此not capable of learning accross input or output time steps."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "\n",
    "ANY_VALUE = 5\n",
    "\n",
    "model = tf.keras.Sequential(\n",
    "    [\n",
    "        tf.keras.layers.LSTM(ANY_VALUE, input_shape=(1,ANY_VALUE)),\n",
    "        tf.keras.layers.Dense(1)\n",
    "    ]\n",
    ")\n",
    "\n",
    "model = tf.keras.Sequential()\n",
    "model.add(tf.keras.layers.LSTM(ANY_VALUE, input_shape=(1,ANY_VALUE)))\n",
    "model.add(tf.keras.layers.Dense(1))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## One-to-Many Model\n",
    "\n",
    "![](assets/3.png)  \n",
    "X可以是一张图像，那么这个模型可以用于给图像生成标题"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "\n",
    "ANY_VALUE = 5\n",
    "\n",
    "model = tf.keras.Sequential(\n",
    "    [\n",
    "        # 这类模型通常用于对一个图像做分析，因此输入是一个图像，用卷积提取特征\n",
    "        tf.keras.layers.Conv2D(filters=ANY_VALUE,kernel_size=(ANY_VALUE, ANY_VALUE)),\n",
    "        tf.keras.layers.LSTM(ANY_VALUE),\n",
    "        # 所有时间步的输出层Dense共享参数，因此使用TimeDistributed\n",
    "        tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(1))\n",
    "    ]\n",
    ")\n",
    "\n",
    "model = tf.keras.Sequential()\n",
    "model.add(tf.keras.layers.Conv2D(filters=ANY_VALUE,kernel_size=(ANY_VALUE, ANY_VALUE)))\n",
    "model.add(tf.keras.layers.LSTM(ANY_VALUE))\n",
    "model.add(tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(1)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "\n",
    "models = tf.keras.models.Sequential([\n",
    "    tf.keras.layers.Conv1D(filter=32, kernel_size=5, strides=5, padding='causal', activation='relu', input_shape=['None', 1])\n",
    "    tf.keras.layers.LSTM(32, return_sequences = True),\n",
    "    tf.keras.layers.LSTM(32, return_sequences = True),\n",
    "    tf.keras.layers.Dense(1),\n",
    "    tf.keras.layers.Lambda(lambda x:x*20)\n",
    "])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Many-to-One Model\n",
    "\n",
    "![](assets/4.png)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 单层Many-to-One\n",
    "import tensorflow as tf\n",
    "\n",
    "ANY_VALUE = 5\n",
    "STEPS = 8\n",
    "\n",
    "model = tf.keras.Sequential(\n",
    "    [\n",
    "        # Many-to-One和One-to-One的主要区别在于input_shape的第一个参数是1还是steps数\n",
    "        tf.keras.layers.LSTM(ANY_VALUE, input_shape=(STEPS,ANY_VALUE)),\n",
    "        tf.keras.layers.Dense(1)\n",
    "    ]\n",
    ")\n",
    "\n",
    "model = tf.keras.Sequential()\n",
    "model.add(tf.keras.layers.LSTM(ANY_VALUE, input_shape=(STEPS,ANY_VALUE)))\n",
    "model.add(tf.keras.layers.Dense(1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 多层Many-to-One\n",
    "import tensorflow as tf\n",
    "\n",
    "ANY_VALUE = 5\n",
    "STEPS = 8\n",
    "\n",
    "model = tf.keras.Sequential(\n",
    "    [\n",
    "        # Many-to-One和One-to-One的主要区别在于input_shape的第一个参数是1还是steps数\n",
    "        tf.keras.layers.LSTM(ANY_VALUE, input_shape=(STEPS,ANY_VALUE), return_sequences=True),\n",
    "        tf.keras.layers.LSTM(ANY_VALUE),\n",
    "        tf.keras.layers.Dense(1)\n",
    "    ]\n",
    ")\n",
    "\n",
    "model = tf.keras.Sequential()\n",
    "model.add(tf.keras.layers.LSTM(ANY_VALUE, input_shape=(STEPS,ANY_VALUE), return_sequences=True))\n",
    "model.add(tf.keras.layers.LSTM(ANY_VALUE))\n",
    "model.add(tf.keras.layers.Dense(1))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Many-to-Many\n",
    "\n",
    "![](assets/5.png)  \n",
    "x的step数和y的step数可以不相同。  \n",
    "Many-to-Many Model又称为Seq2Seq Model，或Encoder-Decoder Model。  \n",
    "Input Sequence ---(Encoder)--->固定长度的向量---(Decoder)--->Output Sequence  \n",
    "Encoder常常被用来做Sequence Embedding.  \n",
    "\n",
    "Many-to-Many模型可以用于statistical(统计的) machine translation."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# x steps = y steps\n",
    "\n",
    "import tensorflow as tf\n",
    "\n",
    "ANY_VALUE = 5\n",
    "STEPS = 8\n",
    "\n",
    "model = tf.keras.Sequential(\n",
    "    [\n",
    "        # Many-to-One和One-to-One的主要区别在于input_shape的第一个参数是1还是steps数\n",
    "        tf.keras.layers.LSTM(ANY_VALUE, input_shape=(STEPS,ANY_VALUE), return_sequences=True),\n",
    "        tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(1))\n",
    "    ]\n",
    ")\n",
    "\n",
    "model = tf.keras.Sequential()\n",
    "model.add(tf.keras.layers.LSTM(ANY_VALUE, input_shape=(STEPS,ANY_VALUE), return_sequences=True))\n",
    "model.add(tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(1)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# x steps <> y steps\n",
    "\n",
    "import tensorflow as tf\n",
    "\n",
    "ANY_VALUE = 5\n",
    "IN_STEPS = 8\n",
    "OUT_STEPS = 10\n",
    "\n",
    "model = tf.keras.Sequential(\n",
    "    [\n",
    "        # Encoder，第一个参数memory cells决定了Encoder后的固定长度向量的长度。  \n",
    "        tf.keras.layers.LSTM(ANY_VALUE, input_shape=(IN_STEPS,ANY_VALUE)),\n",
    "        # the input time steps are mapped to a fixed sized internal representations\n",
    "        # Encoder产生的output是2D的，为[samples, vector_size]，\n",
    "        # Decoder需要的input是3D的，为[samples, steps, vector_size]\n",
    "        # 中间需要一个适配层，让所有step都使用相同的vector\n",
    "        tf.keras.layers.RepeatVector(OUT_STEPS),\n",
    "        # Decoder\n",
    "        tf.keras.layers.LSTM(ANY_VALUE, return_sequences=True),\n",
    "        # 所有step的output层的参数共享\n",
    "        tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(1))\n",
    "    ]\n",
    ")\n",
    "\n",
    "model = tf.keras.Sequential()\n",
    "model.add(tf.keras.layers.LSTM(ANY_VALUE, input_shape=(IN_STEPS,ANY_VALUE)))\n",
    "model.add(tf.keras.layers.RepeatVector(OUT_STEPS))\n",
    "model.add(tf.keras.layers.LSTM(ANY_VALUE, return_sequences=True))\n",
    "model.add(tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(1)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Cardinality from Time Steps (not Features!)\n",
    "\n",
    "![](assets/6.png)  \n",
    "例如复数的实部和虚数，可以是两个特征，也可以是两个序列"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## CNN-LSTM 模型\n",
    "\n",
    "CNN可以特征提取，LSTM可以序列预测  \n",
    "CNN-LSTM模型用于visual time series prediction问题，或者基于图像序列生成文本描述。  \n",
    "CNN-LSTM模型的输入数据是5D的，维度依次为：samples, timesteps, width, height, channels  \n",
    "Flatten()将三维的图像数据拉成一个很长的向量，期望这个向量比原始像素更compressed或salient（显著）。  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 方法一\n",
    "import tensorflow as tf\n",
    "ANY_VALUE = 5\n",
    "\n",
    "cnn = tf.keras.Sequential()\n",
    "cnn.add(tf.keras.layers.Conv2D(filters=ANY_VALUE,kernel_size=(ANY_VALUE, ANY_VALUE)))\n",
    "cnn.add(tf.keras.layers.MaxPooling2D(ANY_VALUE))\n",
    "cnn.add(tf.keras.layers.Flatten())\n",
    "\n",
    "model = tf.keras.Sequential()\n",
    "model.add(tf.keras.layers.TimeDistributed(cnn))\n",
    "model.add(tf.keras.layers.LSTM(ANY_VALUE))\n",
    "model.add(tf.keras.layers.Dense(1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 方法二\n",
    "# 推荐使用这种方法，因为这种方法model.summary会包含全部信息\n",
    "import tensorflow as tf\n",
    "ANY_VALUE = 5\n",
    "\n",
    "model = tf.keras.Sequential()\n",
    "model.add(tf.keras.layers.TimeDistributed(tf.keras.layers.Conv2D(filters=ANY_VALUE,kernel_size=(ANY_VALUE, ANY_VALUE))))\n",
    "model.add(tf.keras.layers.TimeDistributed(tf.keras.layers.MaxPooling2D(ANY_VALUE)))\n",
    "model.add(tf.keras.layers.TimeDistributed(tf.keras.layers.Flatten()))\n",
    "model.add(tf.keras.layers.LSTM(ANY_VALUE))\n",
    "model.add(tf.keras.layers.Dense(1))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Embedding的可视化"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "ename": "IndexError",
     "evalue": "list index out of range",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mIndexError\u001b[0m                                Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-6-3e47c55fd73a>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[0me\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlayers\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m   \u001b[1;31m# embedding层\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mweights\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_weights\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      3\u001b[0m \u001b[0mweights\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m  \u001b[1;31m# (10000, 16), 10000代表单词表的大小，16代表每个单词的维度\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      4\u001b[0m \u001b[1;31m# 单词序号转换为单词\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      5\u001b[0m \u001b[0mreverse_word_index\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mword_index\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mIndexError\u001b[0m: list index out of range"
     ]
    }
   ],
   "source": [
    "e = model.layers[0]   # embedding层\n",
    "weights = e.get_weights()[0]\n",
    "weights.shape()  # (10000, 16), 10000代表单词表的大小，16代表每个单词的维度\n",
    "# 单词序号转换为单词\n",
    "reverse_word_index = dict([(value, key) for (key, value) in word_index.items])\n",
    "\n",
    "# meta.csv: 序号 --> 单词  \n",
    "# vecs.csv：序号 --> embedding\n",
    "import io\n",
    "out_v = io.open('vecs.tsv', 'w', encoding='utf-8')\n",
    "out_m = io.open('meta.tsv', 'w', encoding='utf-8')\n",
    "for word_num in range(1, vocab_size): # 0代表OOV\n",
    "    word = reverse_word_index[word_num]\n",
    "    embeddings = weights[word_num]\n",
    "    out_m.write(word + '\\n')\n",
    "    out_v.write('\\t'.join(str(x) for x in embeddings)+'\\n')\n",
    "out_m.close()\n",
    "out_v.close()\n",
    "# https://projector.tensorflow.org"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 一维CNN做NLP"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "\n",
    "vocab_size = 10000\n",
    "model = tf.keras.Sequential([\n",
    "    tf.keras.layers.Embedding(vocab_size, 64),\n",
    "    tf.keras.layers.Conv1D(128, 5, activation='relu'),\n",
    "    tf.keras.layers.GlobalMaxPooling1D(),\n",
    "    tf.keras.layers.Dense(64, activation='relu'),\n",
    "    tf.keras.layers.Dense(1, activation='sigmoid')\n",
    "])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 语言生成模型"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "例如句子为：  \n",
    "[1, 2, 3, 4, 5, 6, 7]  \n",
    "[1, 3, 5, 7, 9]  \n",
    "[2, 4, 6, 8]  \n",
    "第一步：得到input_sequence为：  \n",
    "[1, 2]  \n",
    "[1, 2, 3]  \n",
    "[1, 2, 3, 4]  \n",
    "[1, 2, 3, 4, 5]  \n",
    "[1, 2, 3, 4, 5, 6]  \n",
    "[1, 2, 3, 4, 5, 6, 7]  \n",
    "[1, 3]  \n",
    "[1, 3, 5]  \n",
    "[1, 3, 5, 7]  \n",
    "[1, 3, 5, 7, 9]  \n",
    "[2, 4]  \n",
    "[2, 4, 6]  \n",
    "[2, 4, 6, 8]  \n",
    "第二步：以前面补0的方式全部对齐到同样的长度（长度为7）  \n",
    "第三步：每一行的前6个为输入，最后一个为输出"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'Tokenizer' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-8-6dd2cfdb0d2c>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[0mdata\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m'.......'\u001b[0m  \u001b[1;31m# 训练数据\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      2\u001b[0m \u001b[0mcorpus\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlower\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'\\n'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m \u001b[0mtokenizer\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mTokenizer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      4\u001b[0m \u001b[0mtokenizer\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit_on_texts\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcorpus\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      5\u001b[0m \u001b[0mtotal_words\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtokenizer\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mword_index\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m+\u001b[0m \u001b[1;36m1\u001b[0m \u001b[1;31m# +1代表OOV\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mNameError\u001b[0m: name 'Tokenizer' is not defined"
     ]
    }
   ],
   "source": [
    "data = '.......'  # 训练数据\n",
    "corpus = data.lower().split('\\n')\n",
    "tokenizer = Tokenizer()\n",
    "tokenizer.fit_on_texts(corpus)\n",
    "total_words = len(tokenizer.word_index) + 1 # +1代表OOV\n",
    "\n",
    "input_sequences = []\n",
    "for line in corpus:\n",
    "    token_list = tokenizer.texts_to_sequences([line])[0]\n",
    "    for i in range(1, len(token_list)):\n",
    "        n_gram_sequence = token_list[:i+1]\n",
    "        input_sequence.append(n_gram_sequence)\n",
    "        \n",
    "max_sequence_len = max([len(x) for x in input_sequences])\n",
    "input_sequences = np.array(pad_sequences(input_sequences,\n",
    "                                         maxlen = max_sequences_len,\n",
    "                                         padding='pre'))\n",
    "xs = input_sequences[:, :-1]\n",
    "labels = input_sequences[:, -1]\n",
    "ys = tf.keras.utils.to_categorical(labels, num_classes = totoal_words)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 另一种写法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0\n",
      "1\n",
      "2\n",
      "3\n",
      "4\n",
      "5\n",
      "6\n",
      "7\n",
      "8\n",
      "9\n"
     ]
    },
    {
     "ename": "AttributeError",
     "evalue": "'_VariantDataset' object has no attribute 'numpy'",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-9-2c1a40283680>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m      6\u001b[0m \u001b[0mdataset\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdataset\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwindow\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m5\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mshift\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdrop_remainder\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      7\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mwindow\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mdataset\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 8\u001b[1;33m     \u001b[0mprint\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mwindow\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mnumpy\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      9\u001b[0m \u001b[0mdataset\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdataset\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmap\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;32mlambda\u001b[0m \u001b[0mwindow\u001b[0m\u001b[1;33m:\u001b[0m\u001b[0mwindow\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m-\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mwindow\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m-\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     10\u001b[0m \u001b[0mdataset\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdataset\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshuffle\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mbuffer_size\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m10\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mAttributeError\u001b[0m: '_VariantDataset' object has no attribute 'numpy'"
     ]
    }
   ],
   "source": [
    "import tensorflow as tf\n",
    "\n",
    "dataset = tf.data.Dataset.range(10)\n",
    "for val in dataset:\n",
    "    print (val.numpy())\n",
    "dataset = dataset.window(5, shift=1, drop_remainder=True)\n",
    "for window in dataset:\n",
    "    print (window.numpy())\n",
    "dataset = dataset.map(lambda window:window[:-1], window[-1:])\n",
    "dataset = dataset.shuffle(buffer_size=10)\n",
    "dataset = dataset.batch(2).prefetch(1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Lambda层"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "\n",
    "# 这句话放在keras的第一层，相当于数据的预处理\n",
    "# input_shape=[None]代表输入为任意数据\n",
    "tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=-1), input_shape=[None])\n",
    "\n",
    "# 这句话放在最后一层，相当于输出数据的后期处理\n",
    "tf.keras.layers.Lambda(lambda x: x*1000)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# early stopping"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from tf.keras.callbacks import EarlyStopping\n",
    "\n",
    "es = EarlyStopping(monitor='val_loss', patience=100)\n",
    "model.fit(..., callbacks=[es])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 模型存储"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 同时存储模型结构和参数\n",
    "\n",
    "# pip install h5py\n",
    "model.save('xxx.h5')\n",
    "\n",
    "from tf.keras.models import load_model\n",
    "\n",
    "model = load_model('xxx.h5')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 只存储模型结构\n",
    "\n",
    "architecture = model.to_json()  # 也可以to_yaml\n",
    "with open('xxx.json', 'wt') as json_file:\n",
    "    json_file.write(architecture)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
